# import numpy as np
# import pandas as pd
# arr = np.random.randint(1,100,5)
# print('一堆原始数据:\n',arr)
# print('等差分段离散化数据:\n',pd.cut(arr,bins=5))
# print('自定义分段离散化数据:\n',pd.cut(arr,bins=[0,20,40,60,80,100]))
# print('自定义分段离散化数据,并设置分段标签:\n',pd.cut(arr,bins=[0,20,40,60,80,100],labels=['0+','20+','40+','60+','80+']))
# import pandas as pd
# pd.set_option('display.unicode.east_asian_width',True)
# df = pd.read_excel('student_info.xlsx',index_col=0)
# print('原始数据:\n',df)
# df['体质指数']=df['体重（kg）']/df['身高（m）']**2
# df['健康状况']=pd.cut(df['体质指数'],bins=[0,18.5,24,28,50],right=False,include_lowest=True,labels=['消瘦','正常','超重','肥胖'])
# print('计算并离散化体质指数后的数据:\n',df)
# print('对性别进行编码,并设置附加前缀及其连接符为空的数据:\n',pd.get_dummies(df,prefix='',prefix_sep='',columns=['性别']))
import pandas as pd
pd.set_option('display.unicode.east_asian_width',True)
df1=pd.DataFrame({'原时间信息':['02/28/2022 12:23:21','2022.02.28','2022/02/28','20220228','28-Feb-2022']})
df1['转换后的时间']=pd.to_datetime(df1['原时间信息'],format='mixed')
print('时间的转换:\n',df1)
df2=pd.DataFrame({'year':['2020','2021','2022'],
                  'month':['1','6','12'],
                  'day':['1','30','31'],
                  'hour':['1','13','18'],
                  'minute':['1','14','30'],
                  'second':['1','0','0']})
df2['组合后的时间']=pd.to_datetime(df2)
print('时间组合:\n',df2)
df3=df2['组合后的时间']
df4=pd.DataFrame()
df4['年'],df4['月'],df4['日']=df3.dt.year,df3.dt.month,df3.dt.day
df4['时'],df4['分'],df4['秒']=df3.dt.hour,df3.dt.minute,df3.dt.second
df4['星期'],df4['季度']=df3.dt.hour,df3.dt.quarter
df4['是否年底'],df4['是否月底']=df3.dt.is_year_end,df3.dt.is_month_end
print('时间的提取:\n',df4)